Heatmaps

Heatmaps

History

History

In-store shopping pattern

Eye-tracking studies

Demand

Frequency counts

Gene Expression

rows: genes

columns: samples

color: change in gene expression level

Source: https://warwick.ac.uk/fac/sci/moac/people/students/peter_cock/r/heatmap/

Geographic pattern

heat maps

numerical –> categorical

Wine dataset

Facet on type

Drawing heatmaps with ggplot2

geom_tile with numerical data, compare to geom_point

x <- 1:3
y <- c(5, 2, 7)
df <- data.frame(x, y)
g1 <- ggplot(df, aes(x, y)) + geom_point()
g2 <- ggplot(df, aes(x, y)) + geom_tile()
grid.arrange(g1, g2, nrow = 1)

geom_tile with variable w, h

df$w <- c(1.4, .4, .2)
df$h <- c(.5, 1.3, .8)
ggplot(df, aes(x, y)) + geom_tile(aes(width = w, height = h))

geom_rect uses (xmin, xmax, ymin, ymax) instead

xmin <- 1:3
xmax <- 2:4
ymin <- c(5, 2, 7)
ymax <- c(6, 3, 8)
df <- data.frame(xmin, xmax, ymin, ymax)
ggplot(df, aes(xmin = xmin, xmax = xmax, ymin = ymin,
               ymax = ymax)) + geom_rect()

force squares

ggplot(df, aes(xmin = xmin, xmax = xmax, ymin = ymin,
               ymax = ymax)) + geom_rect() + coord_fixed()

geom_raster – same as geom_tile w/ uniform w, h & FASTER

x <- 1:3
y <- c(5, 2, 7)
df <- data.frame(x, y)

ggplot(df, aes(x,y)) + geom_raster()

change x & y: what happens to the size of the tiles?

x <- 1:2
y <- c(5, 2)
df <- data.frame(x, y)
ggplot(df, aes(x, y)) + geom_raster()

change x & y again

x <- 1:4
y <- c(5, 2, 10, 7)
df <- data.frame(x, y)
ggplot(df, aes(x, y)) + geom_raster()

complete set of (x, y) pairs

x <- c(1, 1, 1, 2, 2, 2, 3, 3, 3)
y <- c(1, 2, 3, 1, 2, 3, 1, 2, 3)
df <- data.frame(x, y)
ggplot(df, aes(x, y)) + geom_raster()

add color

set.seed(2017)
df$z <- sample(9)
ggplot(df, aes(x, y)) + geom_raster(aes(fill = z))

What if z is categorical?

df$z <- c("A", "B", "C", "B", "A", "A", "B", "C", "B")
ggplot(df, aes(x, y)) + geom_raster(aes(fill = z))

What if z is discrete (numerical)?

df$z <- sample(3, 9, replace = TRUE)
ggplot(df, aes(x, y)) + geom_raster(aes(fill = z))

df$z <- factor(df$z)
ggplot(df, aes(x, y)) + geom_raster(aes(fill = z))

create a heat map theme

theme_heat <- theme_classic() +
  theme(axis.line = element_blank(),
        axis.ticks = element_blank())

ggplot(df, aes(x, y)) + geom_raster(aes(fill = z)) + 
  theme_heat

add coord_fixed

ggplot(df, aes(x, y)) + geom_raster(aes(fill = z)) + 
  coord_fixed() + theme_heat

add white border

ggplot(df, aes(x, y)) + 
  geom_tile(aes(fill = z), color = "white") + 
  coord_fixed() + theme_heat

(doesn’t work with geom_raster())

x & y are categorical

grade <- rep(c("first", "second", "third"), 3)
subject <- rep(c("math", "reading", "gym"), each = 3)
statescore <- sample(50, 9) + 50
df <- data.frame(grade, subject, statescore)

ggplot(df, aes(grade, subject, fill = statescore)) + 
  geom_tile(color = "white") +
  coord_equal() + theme_heat

problem with order of categories

grade <- rep(c("first", "second", "third", "fourth"), 3)
subject <- rep(c("math", "reading", "gym"), each = 4)
statescore <- sample(50, 12) + 50
df <- data.frame(grade, subject, statescore)

ggplot(df, aes(grade, subject, fill = statescore)) + 
  geom_tile(color = "white") +
  coord_equal() + theme_heat

fix order

df$grade <- forcats::fct_relevel(df$grade, "fourth", after = Inf)

ggplot(df, aes(grade, subject, fill = statescore)) + 
  geom_tile(color = "white") +
  coord_equal() + theme_heat

Compare Japan, the UK, and the US in terms of occupational mobility.

library(vcdExtra)
library(dplyr)
orderedclasses <- c("Farm", "LoM", "UpM", "LoNM", "UpNM")
mydata <- Yamaguchi87
mydata$Son <- factor(mydata$Son, levels = orderedclasses)
mydata$Father <- factor(mydata$Father,
                        levels = orderedclasses)
japan <- mydata %>% filter(Country == "Japan")
uk <- mydata %>% filter(Country == "UK")
us <- mydata %>% filter(Country == "US")

Compare Japan, the UK, and the US in terms of occupational mobility.

Japan

ggplot(japan, aes(x = Father, y = Son)) + 
  geom_tile(aes(fill = Freq), color = "white") + 
  coord_fixed() + theme_heat

Compare Japan, the UK, and the US in terms of occupational mobility.

UK

ggplot(uk, aes(x = Father, y = Son)) + 
  geom_tile(aes(fill = Freq), color = "white") + 
  coord_fixed() + theme_heat

Compare Japan, the UK, and the US in terms of occupational mobility.

US

ggplot(us, aes(x = Father, y = Son)) + 
  geom_tile(aes(fill = Freq), color = "white") + 
  coord_fixed() + theme_heat

All

ggplot(mydata, aes(x = Father, y = Son)) +
  geom_tile(aes(fill = Freq), color = "white") +
  coord_fixed() + facet_wrap(~Country) + theme_heat

All, as % of country total

mydata2 <- mydata %>% group_by(Country) %>% 
  mutate(Total = sum(Freq)) %>% ungroup()

ggplot(mydata2, aes(x = Father, y = Son)) +
  geom_tile(aes(fill = Freq/Total), color = "white") +
  coord_fixed() + facet_wrap(~Country) + theme_heat

All, as % of country and class total

mydata3 <- mydata %>% group_by(Country, Father) %>% 
  mutate(Total = sum(Freq)) %>% ungroup()

g <- ggplot(mydata3, aes(x = Father, y = Son)) +
  geom_tile(aes(fill = Freq/Total), color = "white") +
  coord_fixed() + facet_wrap(~Country) + theme_heat
g

g + geom_text(aes(label = round(Freq/Total, 1)),
              color = "white")

Yamaguchi87

grid.arrange(g, g + scale_fill_viridis_c(),
             nrow = 2)

g + scale_fill_viridis_c(direction = -1)

g + scale_fill_viridis_c(end = .9)

RColorBrewer

g + scale_fill_distiller(palette = "RdBu")

ggplot(mydata3, aes(x = Father, y = Son)) +
  geom_tile(aes(fill = (Freq/Total)), color = "white") +
  coord_fixed() + 
  scale_fill_gradient2(low = "black", mid = "white",
                        high = "red", midpoint = .2) +
  facet_wrap(~Country) + theme_heat